﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;

namespace 字符串的提取
{
    class Program
    {
        static void Main(string[] args)
        {
            //string text = "小王在苏州大学就读计算机专业";
            //Match match = Regex.Match(text, @"^(\w+)在(\w+)就读(\w+)专业$");

            //if (match.Success)
            //{
            //    string name = match.Groups[1].Value;
            //    string school = match.Groups[2].Value;
            //    string major = match.Groups[3].Value;

            //    Console.WriteLine("姓名:{0}\n学校:{1}\n专业:{2}\n", name, school, major );
            //    Console.ReadKey();
            //}

            string text = File.ReadAllText("1.htm");
            //将换行符替换掉简化程序
            text.Replace("\r", " ").Replace("\n", " ");

            MatchCollection col = Regex.Matches(text, "<a(.+?)href(\\s)*=(\\s)*\"(.+?)\"(.*?)>(.+?)</a>",RegexOptions.Multiline|RegexOptions.IgnoreCase);

            foreach (Match match in col)
            {
                Console.WriteLine("href={0}  innerhtml={1}", match.Groups[4].Value, match.Groups[6].Value);
            }

            Console.ReadKey();
        }
    }
}
